library(data.table)
library(stringr)

# set working directory to location where replication archive is stored
setwd("C:\\Users\\drmil\\Dropbox\\PoliticalTravel\\Code and Data\\JEPS Replication Archive")

# read in raw data for Study 2 (vignette)
responses <- fread("Study2_Raw.csv", header = TRUE, stringsAsFactors = FALSE)

# remove extra headers
responses <- responses[-c(1:2),]

# remove those who did not agree to consent OR wanted data removed after
# full information provided
responses <- responses[which(responses$Q31=="I AGREE" & responses$Q234!="REMOVE"),]

# recoding attention check question answers
responses$attention1 <- ifelse(responses$Q2=="The Drudge Report,Google News", 1, 0)
responses$attention2 <- ifelse(responses$Q38=="You Belong with Me,Anti-Hero", 1, 0)
responses$attentiveness <- responses$attention1 + responses$attention2

# recoding demographics
responses$gender <- responses$Q63
responses$age <- responses$Q64
responses$education <- responses$Q65
responses$income <- responses$Q69
responses$race_ethnicity <- ifelse(responses$Q66=="White" & responses$Q68=="No", "white_nh", NA)
responses$race_ethnicity <- ifelse(responses$Q66=="White" & responses$Q68=="Yes", "white_h", responses$race_ethnicity)
responses$race_ethnicity <- ifelse(responses$Q66=="Black or African-American" & responses$Q68=="No", "black_nh", responses$race_ethnicity)
responses$race_ethnicity <- ifelse(responses$Q66=="Black or African-American" & responses$Q68=="Yes", "black_h", responses$race_ethnicity)
responses$race_ethnicity <- ifelse(responses$Q66=="Asian", "asian", responses$race_ethnicity)
responses$race_ethnicity <- ifelse(responses$Q66=="American Indian or Alaska Native" |
                                     responses$Q66=="Native Hawaiian or other Pacific Islander" |
                                     responses$Q66=="Other (please specify)", 
                                   "other", responses$race_ethnicity)
# a few respondents preferred not to say if they were hispanic; if white or black, recoded as NA
responses$race_ethnicity <- ifelse(responses$Q66=="White" & responses$Q68=="Prefer not to say", "NA", responses$race_ethnicity)
responses$race_ethnicity <- ifelse(responses$Q66=="Black or African-American" & responses$Q68=="Prefer not to say", "NA", responses$race_ethnicity)

# for weighting purposes, CloudResearch separately handles race and ethnicity
responses$race <- responses$Q66
responses$race <- ifelse(responses$Q66=="White", 1, NA)
responses$race <- ifelse(responses$Q66=="Black or African-American", 2, responses$race)
responses$race <- ifelse(responses$Q66=="Asian", 3, responses$race)
responses$race <- ifelse(responses$Q66=="American Indian or Alaska Native", 3, responses$race)
responses$race <- ifelse(responses$Q66=="Native Hawaiian or other Pacific Islander", 3, responses$race)
responses$race <- ifelse(responses$Q66=="Other (please specify)", 3, responses$race)
responses$ethnicity <- responses$Q68
responses$ethnicity <- ifelse(responses$Q68=="Yes", 1, NA)
responses$ethnicity <- ifelse(responses$Q68=="No", 0, responses$ethnicity)

responses$ideology <- ifelse(responses$Q45=="Very conservative", 1, NA)
responses$ideology <- ifelse(responses$Q45=="Conservative", 2, responses$ideology)
responses$ideology <- ifelse(responses$Q45=="Slightly conservative", 3, responses$ideology)
responses$ideology <- ifelse(responses$Q45=="Moderate", 4, responses$ideology)
responses$ideology <- ifelse(responses$Q45=="Slightly liberal", 5, responses$ideology)
responses$ideology <- ifelse(responses$Q45=="Liberal", 6, responses$ideology)
responses$ideology <- ifelse(responses$Q45=="Very liberal", 7, responses$ideology)

responses$pid <- ifelse(responses$Q72=="Strong Republican", "R", NA)
responses$pid <- ifelse(responses$Q72=="Not a very strong Republican", "R", responses$pid)
responses$pid <- ifelse(responses$Q73=="Republican", "R", responses$pid)
responses$pid <- ifelse(responses$Q73=="Neither", "I", responses$pid)
responses$pid <- ifelse(responses$Q73=="Democratic", "D", responses$pid)
responses$pid <- ifelse(responses$Q71=="Not a very strong Democrat", "D", responses$pid)
responses$pid <- ifelse(responses$Q71=="Strong Democrat", "D", responses$pid)
responses$pid <- ifelse(responses$Q70=="Other (Please specify)", "O", responses$pid)
responses$pid <- factor(responses$pid, levels = c("I", "O", "R", "D"))

# recoding respondents' pretreatment interest in vacationing in each of the
# five states
responses$CA_interest_pre <- ifelse(responses$Q49_1=="Not at all interested", 1, NA)
responses$CA_interest_pre <- ifelse(responses$Q49_1=="Slightly interested", 2, responses$CA_interest_pre)
responses$CA_interest_pre <- ifelse(responses$Q49_1=="Somewhat interested", 3, responses$CA_interest_pre)
responses$CA_interest_pre <- ifelse(responses$Q49_1=="Very interested", 4, responses$CA_interest_pre)
responses$CA_interest_pre <- ifelse(responses$Q49_1=="Extremely interested", 5, responses$CA_interest_pre)
responses$FL_interest_pre <- ifelse(responses$Q49_2=="Not at all interested", 1, NA)
responses$FL_interest_pre <- ifelse(responses$Q49_2=="Slightly interested", 2, responses$FL_interest_pre)
responses$FL_interest_pre <- ifelse(responses$Q49_2=="Somewhat interested", 3, responses$FL_interest_pre)
responses$FL_interest_pre <- ifelse(responses$Q49_2=="Very interested", 4, responses$FL_interest_pre)
responses$FL_interest_pre <- ifelse(responses$Q49_2=="Extremely interested", 5, responses$FL_interest_pre)
responses$IL_interest_pre <- ifelse(responses$Q49_3=="Not at all interested", 1, NA)
responses$IL_interest_pre <- ifelse(responses$Q49_3=="Slightly interested", 2, responses$IL_interest_pre)
responses$IL_interest_pre <- ifelse(responses$Q49_3=="Somewhat interested", 3, responses$IL_interest_pre)
responses$IL_interest_pre <- ifelse(responses$Q49_3=="Very interested", 4, responses$IL_interest_pre)
responses$IL_interest_pre <- ifelse(responses$Q49_3=="Extremely interested", 5, responses$IL_interest_pre)
responses$NY_interest_pre <- ifelse(responses$Q49_4=="Not at all interested", 1, NA)
responses$NY_interest_pre <- ifelse(responses$Q49_4=="Slightly interested", 2, responses$NY_interest_pre)
responses$NY_interest_pre <- ifelse(responses$Q49_4=="Somewhat interested", 3, responses$NY_interest_pre)
responses$NY_interest_pre <- ifelse(responses$Q49_4=="Very interested", 4, responses$NY_interest_pre)
responses$NY_interest_pre <- ifelse(responses$Q49_4=="Extremely interested", 5, responses$NY_interest_pre)
responses$NV_interest_pre <- ifelse(responses$Q49_5=="Not at all interested", 1, NA)
responses$NV_interest_pre <- ifelse(responses$Q49_5=="Slightly interested", 2, responses$NV_interest_pre)
responses$NV_interest_pre <- ifelse(responses$Q49_5=="Somewhat interested", 3, responses$NV_interest_pre)
responses$NV_interest_pre <- ifelse(responses$Q49_5=="Very interested", 4, responses$NV_interest_pre)
responses$NV_interest_pre <- ifelse(responses$Q49_5=="Extremely interested", 5, responses$NV_interest_pre)

# recoding treatment assignment
responses$FL_treatment <- ifelse(responses$`Florida--Vignettes_DO`=="Q50|Q51", "Control", NA)
responses$FL_treatment <- ifelse(responses$`Florida--Vignettes_DO`=="Q50|Q53", "Treatment", responses$FL_treatment)

# recoding respondents' posttreatment interest in vacationing in each of the
# five states
responses$CA_interest_post <- ifelse(responses$Q54_1=="Not at all interested", 1, NA)
responses$CA_interest_post <- ifelse(responses$Q54_1=="Slightly interested", 2, responses$CA_interest_post)
responses$CA_interest_post <- ifelse(responses$Q54_1=="Somewhat interested", 3, responses$CA_interest_post)
responses$CA_interest_post <- ifelse(responses$Q54_1=="Very interested", 4, responses$CA_interest_post)
responses$CA_interest_post <- ifelse(responses$Q54_1=="Extremely interested", 5, responses$CA_interest_post)
responses$FL_interest_post <- ifelse(responses$Q54_2=="Not at all interested", 1, NA)
responses$FL_interest_post <- ifelse(responses$Q54_2=="Slightly interested", 2, responses$FL_interest_post)
responses$FL_interest_post <- ifelse(responses$Q54_2=="Somewhat interested", 3, responses$FL_interest_post)
responses$FL_interest_post <- ifelse(responses$Q54_2=="Very interested", 4, responses$FL_interest_post)
responses$FL_interest_post <- ifelse(responses$Q54_2=="Extremely interested", 5, responses$FL_interest_post)
responses$IL_interest_post <- ifelse(responses$Q54_3=="Not at all interested", 1, NA)
responses$IL_interest_post <- ifelse(responses$Q54_3=="Slightly interested", 2, responses$IL_interest_post)
responses$IL_interest_post <- ifelse(responses$Q54_3=="Somewhat interested", 3, responses$IL_interest_post)
responses$IL_interest_post <- ifelse(responses$Q54_3=="Very interested", 4, responses$IL_interest_post)
responses$IL_interest_post <- ifelse(responses$Q54_3=="Extremely interested", 5, responses$IL_interest_post)
responses$NY_interest_post <- ifelse(responses$Q54_4=="Not at all interested", 1, NA)
responses$NY_interest_post <- ifelse(responses$Q54_4=="Slightly interested", 2, responses$NY_interest_post)
responses$NY_interest_post <- ifelse(responses$Q54_4=="Somewhat interested", 3, responses$NY_interest_post)
responses$NY_interest_post <- ifelse(responses$Q54_4=="Very interested", 4, responses$NY_interest_post)
responses$NY_interest_post <- ifelse(responses$Q54_4=="Extremely interested", 5, responses$NY_interest_post)
responses$NV_interest_post <- ifelse(responses$Q54_5=="Not at all interested", 1, NA)
responses$NV_interest_post <- ifelse(responses$Q54_5=="Slightly interested", 2, responses$NV_interest_post)
responses$NV_interest_post <- ifelse(responses$Q54_5=="Somewhat interested", 3, responses$NV_interest_post)
responses$NV_interest_post <- ifelse(responses$Q54_5=="Very interested", 4, responses$NV_interest_post)
responses$NV_interest_post <- ifelse(responses$Q54_5=="Extremely interested", 5, responses$NV_interest_post)

# recoding respondents' interest in receiving more information about vacationing
# in each of the five states
responses$CA_want_info <- ifelse(str_detect(responses$Q55, "California") & !is.na(responses$FL_treatment), 1, 0)
responses$FL_want_info <- ifelse(str_detect(responses$Q55, "Florida") & !is.na(responses$FL_treatment), 1, 0)
responses$IL_want_info <- ifelse(str_detect(responses$Q55, "Illinois") & !is.na(responses$FL_treatment), 1, 0)
responses$NY_want_info <- ifelse(str_detect(responses$Q55, "New York") & !is.na(responses$FL_treatment), 1, 0)
responses$NV_want_info <- ifelse(str_detect(responses$Q55, "Nevada") & !is.na(responses$FL_treatment), 1, 0)

# taking the difference of respondents' interest in vacationing in each state as
# measured before and after treatment to calculate pre-post interest change
responses$CA_interest_change <- responses$CA_interest_post - responses$CA_interest_pre
responses$FL_interest_change <- responses$FL_interest_post - responses$FL_interest_pre
responses$IL_interest_change <- responses$IL_interest_post - responses$IL_interest_pre
responses$NY_interest_change <- responses$NY_interest_post - responses$NY_interest_pre
responses$NV_interest_change <- responses$NV_interest_post - responses$NV_interest_pre

responses_sub <- subset(responses, select = c(ResponseId, attention1,
                                              attention2, attentiveness,
                                              gender, 
                                              age, education, income,
                                              race, ethnicity,
                                              race_ethnicity, ideology,
                                              pid, age_cr,
                                              ethnicity_cr,
                                              gender_cr, race_cr,
                                              FL_treatment,
                                              CA_interest_pre,
                                              FL_interest_pre,
                                              IL_interest_pre,
                                              NV_interest_pre,
                                              NY_interest_pre,
                                              CA_interest_post,
                                              FL_interest_post,
                                              IL_interest_post,
                                              NV_interest_post,
                                              NY_interest_post,
                                              CA_interest_change,
                                              FL_interest_change,
                                              IL_interest_change,
                                              NV_interest_change,
                                              NY_interest_change,
                                              CA_want_info,
                                              FL_want_info,
                                              IL_want_info,
                                              NV_want_info,
                                              NY_want_info))

# to construct survey weights consistent with CloudResearch's targets, we need to
# recode respondent demographic variables to be consistent with that scheme
responses_sub$age_cloudresearch <- ifelse(responses_sub$age_cr>=18 & responses_sub$age_cr<30, 1, NA)
responses_sub$age_cloudresearch <- ifelse(responses_sub$age_cr>=30 & responses_sub$age_cr<45, 2, responses_sub$age_cloudresearch)
responses_sub$age_cloudresearch <- ifelse(responses_sub$age_cr>=45 & responses_sub$age_cr<59, 3, responses_sub$age_cloudresearch)
responses_sub$age_cloudresearch <- ifelse(responses_sub$age_cr>=60, 4, responses_sub$age_cloudresearch)

responses_sub$gender_cloudresearch <- ifelse(responses_sub$gender_cr=="Man", 0, NA)
responses_sub$gender_cloudresearch <- ifelse(responses_sub$gender_cr=="Woman", 1, responses_sub$gender_cloudresearch)

responses_sub$race_cloudresearch <- ifelse(responses_sub$race_cr=="White" | responses_sub$race_cr=="White,White", 1, NA)
responses_sub$race_cloudresearch <- ifelse(responses_sub$race_cr=="Black or African American" | responses_sub$race_cr=="Black or African American,Black or African American", 2, responses_sub$race_cloudresearch)
responses_sub$race_cloudresearch <- ifelse(responses_sub$race_cr!="White" & responses_sub$race_cr!="White,White" & responses_sub$race_cr!="Black or African American" & responses_sub$race_cr!="Black or African American,Black or African American", 3, responses_sub$race_cloudresearch)

responses_sub$ethnicity_cloudresearch <- ifelse(responses_sub$ethnicity_cr=="No, not of Hispanic, Latino, or Spanish origin", 0, NA)
responses_sub$ethnicity_cloudresearch <- ifelse(responses_sub$ethnicity_cr=="Yes, another Hispanic, Latino, or Spanish origin (for example, Salvadoran, Dominican, Colombian, Guatemalan, Spaniard, Ecuadorian, etc.)", 1, responses_sub$ethnicity_cloudresearch)
responses_sub$ethnicity_cloudresearch <- ifelse(responses_sub$ethnicity_cr=="Yes, Cuban", 1, responses_sub$ethnicity_cloudresearch)
responses_sub$ethnicity_cloudresearch <- ifelse(responses_sub$ethnicity_cr=="Yes, Mexican, Mexican Am., Chicano", 1, responses_sub$ethnicity_cloudresearch)
responses_sub$ethnicity_cloudresearch <- ifelse(responses_sub$ethnicity_cr=="Yes, Puerto Rican", 1, responses_sub$ethnicity_cloudresearch)

responses_sub <- subset(responses_sub, select = c(ResponseId, attention1,
                                                  attention2, attentiveness,
                                                  gender, 
                                              age, education, income,
                                              race, ethnicity,
                                              race_ethnicity, ideology,
                                              pid,
                                              age_cloudresearch,
                                              gender_cloudresearch,
                                              race_cloudresearch,
                                              ethnicity_cloudresearch,
                                              FL_treatment,
                                              CA_interest_pre,
                                              FL_interest_pre,
                                              IL_interest_pre,
                                              NV_interest_pre,
                                              NY_interest_pre,
                                              CA_interest_post,
                                              FL_interest_post,
                                              IL_interest_post,
                                              NV_interest_post,
                                              NY_interest_post,
                                              CA_interest_change,
                                              FL_interest_change,
                                              IL_interest_change,
                                              NV_interest_change,
                                              NY_interest_change,
                                              CA_want_info,
                                              FL_want_info,
                                              IL_want_info,
                                              NV_want_info,
                                              NY_want_info))

fwrite(responses_sub, "Study2_Final.csv", quote = "auto", na = NA)